import scrapy

from baidutieba.items import BaidutiebaItem


class TiebaSpider(scrapy.Spider):
    name = 'tieba'
    allowed_domains = ['tieba.baidu.com']
    start_urls = ['https://tieba.baidu.com/f?kw=python&ie=utf-8']
    pn = 0

    def parse(self, response):
        item = BaidutiebaItem()
        root = response.xpath('//*[@id="thread_list"]/li')
        for r in root:
            item['reply'] = r.xpath('.//span[@class="threadlist_rep_num center_text"]/text()').get()
            item['title'] = r.xpath('.//a[@class="j_th_tit "]/text()').get()
            item['author'] = r.xpath('.//div[@class="threadlist_author pull_right"]/span/span/a/text()').get()
            print(item['reply'], item['title'], item['author'])
            yield item

        if self.pn < 50:
            self.pn += 50
            url = 'http://tieba.baidu.com/f?kw=python&ie=utf-8&pn=' + str(self.pn)
            yield scrapy.Request(url=url, callback=self.parse)
